#include <mpi.h>
#include <type_traits>
template<typename T>
MPI_Datatype get_type(){
  if (std::is_same<T,int>::value) {
    return MPI_INT;
  } else if (std::is_same<T,float>::value) {
    return MPI_FLOAT;
  } else if (std::is_same<T,double>::value) {
    return MPI_DOUBLE;
  }
}

template <typename T>
size_t align(int cur_size) {
  return ((cur_size + sizeof(T) - 1) & ~(sizeof(T) - 1));
}
template <typename T0>
size_t estimate_size(int cur_size, T0 arg0) {
  return align<T0>(cur_size) + sizeof(T0);
}
template <typename T0, typename... Ts>
size_t estimate_size(int cur_size, T0 arg0, Ts... args){
  return estimate_size(align<T0>(cur_size) +  + sizeof(T0), args...);
}
template <typename T0>
void pack_args(MPI_Datatype *types, MPI_Aint *disps, int *lengths, char *data, size_t cur_disp, T0 arg0) {
  *types = get_type<T0>();
  size_t aligned_disp = align<T0>(cur_disp);
  *disps = aligned_disp;
  *lengths = 1;
  *(T0*)(data + aligned_disp) = arg0;
}
template <typename T0, typename... Ts>
void pack_args(MPI_Datatype *types, MPI_Aint *disps, int *lengths, char *data, size_t cur_disp, T0 arg0, Ts... args) {
  *types = get_type<T0>();
  size_t aligned_disp = align<T0>(cur_disp);
  *disps = aligned_disp;
  *lengths = 1;
  *(T0*)(data + aligned_disp) = arg0;
  pack_args(types + 1, disps + 1, lengths + 1, data, aligned_disp + sizeof(T0), args...);
}
template <typename T0>
void unpack_args(char *data, size_t cur_disp, T0 &arg0) {
  size_t aligned_disp = align<T0>(cur_disp);
  arg0 = *(T0*)(data + aligned_disp);
}
template <typename T0, typename... Ts>
void unpack_args(char *data, size_t cur_disp, T0 &arg0, Ts &...args) {
  size_t aligned_disp = align<T0>(cur_disp);
  arg0 = *(T0*)(data + aligned_disp);
  unpack_args(data, aligned_disp + sizeof(T0), args...);
}
template <typename T0>
size_t sum_args(char *in, char *inout, size_t cur_disp) {
  size_t aligned_disp = align<T0>(cur_disp);
  *(T0*)(inout + aligned_disp) += *(T0*)(in + aligned_disp);
  return aligned_disp + sizeof(T0);
}
template <typename T0, typename T1, typename ...Ts>
size_t sum_args(char *in, char *inout, size_t cur_disp) {
  size_t aligned_disp = align<T0>(cur_disp);
  *(T0*)(inout + aligned_disp) += *(T0*)(in + aligned_disp);
  return sum_args<T1, Ts...>(in, inout, aligned_disp + sizeof(T0));
}
template <typename ...Ts>
void sum_hybrid(void *in, void *inout, int *len, MPI_Datatype *datatype){
  char *cin = (char*)in;
  char *cinout = (char*)inout;
  for (int i = 0; i < *len; i ++) {
    size_t consumed = sum_args<Ts...>(cin, cinout, 0);
    cin += consumed;
    cinout += consumed;
  }
}
template <typename... Ts> 
void vreduce(Ts &...args){
  char data[estimate_size(0, args...)];
  MPI_Datatype ts[sizeof...(Ts)];
  MPI_Aint disps[sizeof...(Ts)];
  int lengths[sizeof...(Ts)];
  pack_args(ts, disps, lengths, data, 0, args...);

  static MPI_Datatype reduction_type = 0;
  static MPI_Op sumop = 0;

  if (reduction_type == 0) {
    MPI_Type_create_struct(sizeof...(Ts), lengths, disps, ts, &reduction_type);
    MPI_Type_commit(&reduction_type);
    MPI_Op_create(sum_hybrid<Ts...>, 1, &sumop );
  }

  MPI_Allreduce(MPI_IN_PLACE, data, 1, reduction_type, sumop, MPI_COMM_WORLD);
  unpack_args(data, 0, args...);
}
